/*=======================================================================================
	PFW_vs_CE.do
	
		Compare consumption response to dividend payments directly measured in the 
		Personal Finance Website (PFW) with dividend payments imputed in the same 
		fashion as in the Consumer Expenditure Survey (CE)


	Author: Lorenz Kueng, June 2015
=========================================================================================*/

cap log close PFW_06
log using "$homedir/log-files/PFW_06_$date.log", text replace name(PFW_06)


*** Load data

use "$homedir/data/stata/PFW_quarterly.dta", clear

	generate date = Quarter
	format   date %tq

	lab var APFD "PFD payments" // APFD payments indentified via direct deposit entry in early October

	xtset userid date
	
	
	*** Create additional variables

	* controls
	foreach var in residentialstatus marriagestatus {
		display _n(1) "`var'"
		cap drop `var'_
		encode   `var', generate(`var'_)
		bysort userid (date): egen _temp = mode(`var'_), maxmode
		replace  `var'_ = _temp if `var'_==. & _temp!=. 
		replace  `var'_ = 999   if `var'_==. 
		drop _temp
	}
	foreach var in age edu profession homezipcode {
		display _n(1) "`var'"
		cap drop `var'_
		generate `var'_ = `var'
		bysort userid (date): egen _temp = mode(`var'_), maxmode
		replace  `var'_ = _temp if `var'_==. & _temp!=. 
		replace  `var'_ = 999   if `var'_==. 
		drop _temp
	}
	
	cap drop _temp
	gen D_incomeQ = D.incomeQ
	bysort userid (Quarter): egen _temp = mean(D_incomeQ)
	replace D_incomeQ = _temp if missing(D_incomeQ)==1
	drop _temp*
	
	foreach var in incomeQ D_incomeQ incomeY liquid_asset1 {
		winsor `var', g(`var'W) p(0.01)
		sum `var' `var'W, d
	}
	
	** total expenditures as a measure of permanent income (excluding uncategorized transactions, ie only nondurables, services, durables)
	cap drop totexp1_annual
	bysort userid: egen totexp1_annual = mean(totexp1) 
	replace totexp1_annual =  totexp1_annual*4
	replace totexp1_annual = -totexp1_annual
	lab var totexp1_annual "annualized total expenditures (totexp1)"
	summarize totexp1_annual, detail
	winsor    totexp1_annual, p(0.01) generate(totexp1_annualW)
	summarize totexp1_annual totexp1_annualW, detail



	*------------------------------------------------------------
	* Impute dividend payments as in the CE
	*------------------------------------------------------------
	*
 	* NOTE: In CE we don't observe the dividend, hence we cannot condition on households that receive a dividend! (Also, we have to use the self-reported family size (variable family_sizeImp) instead of the number of checks used in the variable NumbChecks.)		
	*

		generate PFDShockAlaska = PFD * family_sizeImp * Alaska
		lab var  PFDShockAlaska  "PFD x family size"					


	*-----------------------------------------------------------
	* Winsorize dependent variable at 1%
	*-----------------------------------------------------------
	
		global depvar = "nondur_serv"
		
		xtset userid date

		* make expenditures positive
		
		replace ${depvar} = -${depvar}

		* save a copy of the original variable

		cap drop original
		generate original = ${depvar}
		
		* winsorize at 1% (conditional on non-zero observations for disaggregated categories)
		
		summarize ${depvar}, detail
		summarize ${depvar} if ${depvar}!=0, detail

		count if ${depvar}==0 
		
		if `r(N)'<5000 {
		
			cap drop temp
			winsor ${depvar}, generate(temp) p(0.01) 
			replace temp = 0 if temp==.
			replace ${depvar} = temp
		}
		else { // winsorize conditional on positive entries (non-zeros) for disaggregated variables
		
			cap drop temp
			winsor ${depvar} if ${depvar}!=0, generate(temp) p(0.01) 
			replace temp = 0 if temp==.
			replace ${depvar} = temp
		}	
		summarize ${depvar}, detail
		summarize ${depvar} if ${depvar}!=0, detail

		* make all variables positive (i.e., expenditures)
			
		replace $depvar = abs(${depvar}) 
		summarize ${depvar}, detail

		xtset userid date

		
		* calculate after-tax income
		
		generate ttax = -(cat19 + cat1901 + cat1902 + cat1903 + cat1905) // all taxes except sales taxes (cat1904), consistent with CEX after-tax income definition
		cap drop _temp
		egen _temp = total(ttax), by(userid year)
		replace  ttax = _temp
		drop _temp		
		foreach var in incomeY ttax { // winsor income and taxes at 1% before subtracting
			winsor `var', generate(_temp) p(0.01)
			sum `var' _temp, detail
			replace `var'= _temp
			drop _temp
		}
		
		generate incomeYat = incomeY - ttax
		lab var  incomeYat "annual after-tax income" // in principle we would need to subtract taxes paid in the next year (assuming no estimated taxes), but given the short panel we would loose too many observations. Moreover, since everything is linear, the mean won't be affected.
		sum ttax incomeY incomeYat
		
		replace incomeYat = incomeYat/100000 // to increase coefficient
		
		
		

	global file "$homedir/results/tables/Table3_PFW_vs_CE_quarterly_${date}"
	cap rm "${file}.xls"
	cap rm "${file}.txt"


	*------------------------------------------------
	* PF shocks measured in PFW data
	*------------------------------------------------

		lab var APFD "PFD payments" // APFD payments indentified via direct deposit entry in early October

		reghdfe D.${depvar}  APFD /// 
				liquid_asset1 D_incomeQ /// liquidit constraints (low liquid asssets and income 'shocks')
				incomeY totexp1_annual /// being poor (low current and low permanent income)
			if APFDid_annual!=. & liquid_asset1!=. /// NOTE: In CE we don't observe the dividend, hence in principle we cannot condition on households that receive a dividend! 
												   ///       However, the results are very similar, so I leave the conditional results with the same PFW sample as in all other tables.
			, absorb(date Alaska family_sizeImp age_ edu_ marriagestatus_ residentialstatus_ profession_ homezipcode_) vce(cluster userid)

			outreg2 using "${file}.xls",  alpha(0.01, 0.05, 0.1) symbol(***,**,*) bdec(3) se nocons label addstat(Number of clusters,e(N_clust)) ///
				ctitle("PFW dividend") ///
				addtext( ///
						" - Time FE"                        , YES, ///
						" - State FE"                       , YES, ///
						" - Family size FE"                 , YES, ///
						" - Income"                         , YES, ///
						" - Liquid assets"                  , YES, ///
						" - Household characteristics"      , YES ///
				) ///
				keep( APFD )


		** Interact with income to calculate sample-composition-adjusted MPC

		generate APFDxInc = APFD * incomeYat
		lab var  APFDxInc "PFD payments x income/$100000"

		reghdfe D.${depvar} APFD APFDxInc incomeYat /// 
				liquid_asset1 D_incomeQ ///
				incomeY totexp1_annual ///
			if APFDid_annual!=. & liquid_asset1!=. /// 
			, absorb(date Alaska family_sizeImp age_ edu_ marriagestatus_ residentialstatus_ profession_ homezipcode_) vce(cluster userid)

			local    ImpliedMPCinACS = _b[APFD]+_b[APFDxInc]*72000/100000
			display `ImpliedMPCinACS'
			
			lincom APFD + APFDxInc*72000/100000 // median Alaskan after-tax income 2010-14 in the ACS is $72,000
			
			outreg2 using "${file}.xls",  alpha(0.01, 0.05, 0.1) symbol(***,**,*) bdec(3) se nocons label addstat(Number of clusters,e(N_clust),predicted MPC using average CE income,`r(estimate)',standard error,`r(se)') ///
				ctitle("ACS population MPC") ///
				addtext( ///
						" - Time FE"                        , YES, ///
						" - State FE"                       , YES, ///
						" - Family size FE"                 , YES, ///
						" - Income"                         , YES, ///
						" - Liquid assets"                  , YES, ///
						" - Household characteristics"      , YES ///
				) ///
				keep( APFD APFDxInc )
		
		
		
		
	*------------------------------------------------
	* PF shocks imputed using family size as in CEX
	*------------------------------------------------

		lab var  PFDShockAlaska  "PFD x family size"					

		reghdfe D.${depvar}  PFDShockAlaska /// 
				liquid_asset1 D_incomeQ ///
				incomeY totexp1_annual ///
			if APFDid_annual!=. & liquid_asset1!=. /// 
			, absorb(date Alaska family_sizeImp age_ edu_ marriagestatus_ residentialstatus_ profession_ homezipcode_) vce(cluster userid)

			outreg2 using "${file}.xls",  alpha(0.01, 0.05, 0.1) symbol(***,**,*) bdec(3) se nocons label addstat(Number of clusters,e(N_clust)) ///
				ctitle("PFD imputed") ///
				addtext( ///
						" - Time FE"                        , YES, ///
						" - State FE"                       , YES, ///
						" - Family size FE"                 , YES, ///
						" - Income"                         , YES, ///
						" - Liquid assets"                  , YES, ///
						" - Household characteristics"      , YES ///
				) ///
				keep( PFDShockAlaska )


	*------------------------------------------------
	* Interact PF shock with income
	*------------------------------------------------

		generate PFDShockAlaskaInc = PFDShockAlaska * incomeYat
		lab var  PFDShockAlaskaInc "PFD x family size x income/$100000"

		reghdfe D.${depvar} PFDShockAlaska PFDShockAlaskaInc incomeYat /// 
				liquid_asset1 D_incomeQ ///
				incomeY totexp1_annual ///
			if APFDid_annual!=. & liquid_asset1!=. /// 
			, absorb(date Alaska family_sizeImp age_ edu_ marriagestatus_ residentialstatus_ profession_ homezipcode_) vce(cluster userid)

			local    ImpliedMPCinCE = _b[PFDShockAlaska]+_b[PFDShockAlaskaInc]*63000/100000
			display `ImpliedMPCinCE'
			
			lincom PFDShockAlaska + PFDShockAlaskaInc*63000/100000 // average Alaskan income in the CE is $63,000 in local dollars
			
			outreg2 using "${file}.xls",  alpha(0.01, 0.05, 0.1) symbol(***,**,*) bdec(3) se nocons label addstat(Number of clusters,e(N_clust),predicted MPC using average CE income,`r(estimate)',standard error,`r(se)') ///
				ctitle("sample compostion") ///
				addtext( ///
						" - Time FE"                        , YES, ///
						" - State FE"                       , YES, ///
						" - Family size FE"                 , YES, ///
						" - Income"                         , YES, ///
						" - Liquid assets"                  , YES, ///
						" - Household characteristics"      , YES ///
				) ///
				keep( PFDShockAlaska PFDShockAlaskaInc )


	*----------------------------------
	* IV noisy with direct measure
	*----------------------------------

		generate D_${depvar} = D.${depvar}
		
		xi: ivreg2 D_${depvar} (PFDShockAlaska = APFD) ///
					liquid_asset1 D_incomeQ ///
					incomeY totexp1_annual ///
					i.date ///
					i.Alaska /// 
					i.family_sizeImp ///
					i.age_ i.edu_ i.marriagestatus_ i.residentialstatus_ i.profession_ i.homezipcode_ ///
			if APFDid_annual!=. & liquid_asset1!=. /// 
			, cluster(userid)
	
			outreg2 using "${file}.xls",  alpha(0.01, 0.05, 0.1) symbol(***,**,*) bdec(3) se nocons label addstat(Number of clusters,e(N_clust)) ///
				ctitle("IV") ///
				addtext( ///
						" - Time FE"                        , YES, ///
						" - State FE"                       , YES, ///
						" - Family size FE"                 , YES, ///
						" - Income"                         , YES, ///
						" - Liquid assets"                  , YES, ///
						" - Household characteristics"      , YES ///
				) ///
				keep( PFDShockAlaska ) ///
				sortvar( APFD ///
						 PFDShockAlaska ///
						 APFDxInc ///
						 PFDShockAlaskaInc ///
				)

	cap rm "${file}.txt"
	
log close PFW_06
